EuroCD 3

home *** CD-ROM | disk | FTP | other *** search

/ EuroCD 3 / EuroCD 3.iso / Games / Doom / ADoom-0.8 / ADoom_src / amiga_draw.s < prev next >

Wrap

Text File | 1998-06-24 | 37KB | 1,605 lines

* * amiga_draw.s - optimized rendering * by Aki Laukkanen <amlaukka@cc.helsinki.fi> * * This file is public domain. * ; mc68020 ; multipass ; if (_eval(DEBUG)&$8000) ; debug on,lattice4 ; endc include "exec/types.i" ;----------------------------------------------------------------------- SCREENWIDTH equ 320 FUZZTABLE equ 250 FUZZOFF equ SCREENWIDTH FRACBITS equ 16 FRACUNIT equ (1<<FRACBITS) * * global functions * ;; xdef _R_DrawColumn_030 ; high detail ;; xdef @R_DrawColumn_030 xdef _R_DrawColumn_040 ; high detail xdef @R_DrawColumn_040 xdef _R_DrawSpan_040 xdef @R_DrawSpan_040 xdef _R_DrawColumn_060 xdef @R_DrawColumn_060 xdef _R_DrawSpan_060 xdef @R_DrawSpan_060 xdef _R_DrawFuzzColumn xdef @R_DrawFuzzColumn ;; xdef _R_DrawTranslatedColumn ;; xdef @R_DrawTranslatedColumn xdef _R_DrawSpanLow ; low detail xdef @R_DrawSpanLow xdef _R_DrawColumnLow xdef @R_DrawColumnLow xdef _R_DrawFuzzColumnLow xdef @R_DrawFuzzColumnLow ;; xdef _R_DrawTranslatedColumnLow ;; xdef @R_DrawTranslatedColumnLow xdef _R_RenderSegLoop xdef @R_RenderSegLoop * * needed symbols/labels * xref _dc_yl xref _dc_yh xref _dc_x xref _columnofs xref _ylookup xref _dc_iscale xref _centery xref _dc_texturemid xref _dc_source xref _dc_colormap xref _ds_xfrac xref _ds_yfrac xref _ds_x1 xref _ds_y xref _ds_x2 xref _ds_xstep xref _ds_ystep xref _ds_source xref _ds_colormap xref _fuzzoffset xref _fuzzpos xref _viewheight xref _dc_translation xref _colormaps ;----------------------------------------------------------------------- section text,code ; low detail drawing functions ;----------------------------------------------------------------------- cnop 0,4 _R_DrawColumnLow @R_DrawColumnLow movem.l d3-d4/d6-d7/a2/a3,-(sp) move.l _dc_yh(a4),d7 ; count = _dc_yh - _dc_yl move.l _dc_yl(a4),d0 sub.l d0,d7 bmi.w .end1 move.l _dc_x(a4),d1 ; dest = ylookup[_dc_yl] + columnofs[_dc_x] lea _ylookup(a4),a0 add.l d1,d1 ; dc_x <<= 1 move.l (a0,d0.l*4),a0 lea _columnofs(a4),a1 add.l (a1,d1.l*4),a0 move.l _dc_colormap(a4),d4 move.l _dc_source(a4),a1 move.l _dc_iscale(a4),d1 ; frac = _dc_texturemid + (_dc_yl-centery)*fracstep sub.l _centery(a4),d0 muls.l d1,d0 add.l _dc_texturemid(a4),d0 moveq #$7f,d3 lea (SCREENWIDTH*4).w,a3 ; d7: cnt >> 2 ; a0: chunky ; a1: texture ; d0: frac (uuuu uuuu uuuu uuuu 0000 0000 0UUU UUUU) ; d1: dfrac (.......................................) ; d3: $7f ; d4: light table aligned to 256 byte boundary ; a3: SCREENWIDTH move.l d7,d6 and.w #3,d6 swap d0 ; swap decimals and fraction swap d1 add.w .width_tab1(pc,d6.w*2),a0 lsr.w #2,d7 move.w .tmap_tab1(pc,d6.w*2),d6 and.w d3,d0 sub.w d1,d0 add.l d1,d0 ; setup the X flag jmp .loop1(pc,d6.w) cnop 0,4 .width_tab1 dc.w -3*SCREENWIDTH dc.w -2*SCREENWIDTH dc.w -1*SCREENWIDTH dc.w 0 .tmap_tab1 dc.w .01-.loop1 dc.w .11-.loop1 dc.w .21-.loop1 dc.w .31-.loop1 .loop1 .31 move.b (a1,d0.w),d4 addx.l d1,d0 move.l d4,a2 move.w (a2),d6 and.w d3,d0 move.b (a2),d6 move.w d6,(a0) .21 move.b (a1,d0.w),d4 addx.l d1,d0 move.l d4,a2 move.w (a2),d6 and.w d3,d0 move.b (a2),d6 move.w d6,SCREENWIDTH(a0) .11 move.b (a1,d0.w),d4 addx.l d1,d0 move.l d4,a2 move.w (a2),d6 and.w d3,d0 move.b (a2),d6 move.w d6,SCREENWIDTH*2(a0) .01 move.b (a1,d0.w),d4 addx.l d1,d0 move.l d4,a2 move.w (a2),d6 and.w d3,d0 move.b (a2),d6 move.w d6,SCREENWIDTH*3(a0) add.l a3,a0 .loop_end1 dbf d7,.loop1 .end1 movem.l (sp)+,d3-d4/d6-d7/a2/a3 rts ;----------------------------------------------------------------------- cnop 0,4 _R_DrawSpanLow @R_DrawSpanLow movem.l d2-d7/a2-a4,-(sp) move.l _ds_y(a4),d0 move.l _ds_x1(a4),d1 ; dest = ylookup[_ds_y] + columnofs[_ds_x1] lea _ylookup(a4),a0 add.l d1,d1 move.l (a0,d0.l*4),a0 lea _columnofs(a4),a1 add.l (a1,d1.l*4),a0 move.l _ds_x2(a4),d7 ; count = _ds_x2 - _ds_x1 move.l _ds_source(a4),a1 add.l d7,d7 move.l _ds_colormap(a4),a2 sub.l d1,d7 addq.l #2,d7 move.l _ds_xfrac(a4),d0 move.l _ds_yfrac(a4),d1 move.l _ds_xstep(a4),d2 move.l _ds_ystep(a4),d3 move.l a0,d4 ; notice, that this address must already be aligned by word btst #1,d4 beq.b .skips2 move.l d0,d5 ; do the unaligned pixels move.l d1,d6 ; so we can write to longword swap d5 ; boundary in the main loop swap d6 and.w #$3f,d5 and.w #$3f,d6 ; this is the worst possible lsl.w #6,d6 ; way but hey, this is not a loop or.w d5,d6 move.b (a1,d6.w),d5 add.l d2,d0 move.b (a2,d5.w),(a0)+ add.l d3,d1 move.b (a2,d5.w),(a0)+ ; I know this is crap but spare me the comments subq.l #2,d7 .skips2 move.l a2,d4 lea $1000(a1),a1 ; catch 22 move.l a0,a3 add.l d7,a3 move.l d7,d5 and.b #~7,d5 move.l a0,a4 add.l d5,a4 eor.w d0,d1 ; swap fraction parts for addx eor.w d2,d3 eor.w d1,d0 eor.w d3,d2 eor.w d0,d1 eor.w d2,d3 swap d0 swap d1 swap d2 swap d3 lsl.w #6,d1 lsl.w #6,d3 move.w #$ffc0,d6 move.w #$f03f,d7 lsr.w #3,d5 beq.b .skip_loop22 sub.w d2,d0 add.l d2,d0 ; setup the X flag .loop22 or.w d6,d0 ; Not really and exercise in optimizing or.w d7,d1 ; but I guess it's faster than 1x1 for 030 and.w d1,d0 ; where this low detail business is needed. addx.l d3,d1 move.b (a1,d0.w),d4 addx.l d2,d0 move.l d4,a2 move.w (a2),d5 or.w d6,d0 move.b (a2),d5 or.w d7,d1 and.w d1,d0 swap d5 addx.l d3,d1 move.b (a1,d0.w),d4 addx.l d2,d0 move.l d4,a2 move.w (a2),d5 or.w d6,d0 move.b (a2),d5 or.w d7,d1 and.w d1,d0 move.l d5,(a0)+ addx.l d3,d1 move.b (a1,d0.w),d4 addx.l d2,d0 move.l d4,a2 move.w (a2),d5 or.w d6,d0 move.b (a2),d5 or.w d7,d1 and.w d1,d0 swap d5 addx.l d3,d1 move.b (a1,d0.w),d4 addx.l d2,d0 move.l d4,a2 move.w (a2),d5 move.b (a2),d5 move.l d5,(a0)+ cmp.l a0,a4 bne.b .loop22 .skip_loop22 sub.w d2,d0 add.l d2,d0 bra.b .loop_end22 .loop32 or.w d6,d0 or.w d7,d1 and.w d1,d0 addx.l d3,d1 move.b (a1,d0.w),d4 addx.l d2,d0 move.l d4,a2 move.b (a2),(a0)+ move.b (a2),(a0)+ .loop_end22 cmp.l a0,a3 bne.b .loop32 .end22 movem.l (sp)+,d2-d7/a2-a4 rts ;----------------------------------------------------------------------- cnop 0,4 _R_DrawTranslatedColumnLow @R_DrawTranslatedColumnLow movem.l d2-d4/d6-d7/a2/a3,-(sp) move.l _dc_yh(a4),d7 ; count = _dc_yh - _dc_yl move.l _dc_yl(a4),d0 sub.l d0,d7 bmi.w .end3 move.l _dc_x(a4),d1 ; dest = ylookup[_dc_yl] + columnofs[_dc_x] lea _ylookup(a4),a0 add.l d1,d1 move.l (a0,d0.l*4),a0 lea _columnofs(a4),a1 add.l (a1,d1.l*4),a0 move.l _dc_translation(a4),d2 move.l _dc_colormap(a4),d4 move.l _dc_source(a4),a1 move.l _dc_iscale(a4),d1 ; frac = _dc_texturemid + (_dc_yl-centery)*fracstep sub.l _centery(a4),d0 muls.l d1,d0 add.l _dc_texturemid(a4),d0 moveq #$7f,d3 lea (SCREENWIDTH*4).w,a3 ; d7: cnt >> 2 ; a0: chunky ; a1: texture ; d0: frac (uuuu uuuu uuuu uuuu 0000 0000 0UUU UUUU) ; d1: dfrac (.......................................) ; d3: $7f ; d4: light table aligned to 256 byte boundary ; d2: translation table aligned to 256 byte boundary ; a3: SCREENWIDTH move.l d7,d6 and.w #3,d6 swap d0 ; swap decimals and fraction swap d1 add.w .width_tab3(pc,d6.w*2),a0 lsr.w #2,d7 move.w .tmap_tab3(pc,d6.w*2),d6 and.w d3,d0 sub.w d1,d0 add.l d1,d0 ; setup the X flag jmp .loop3(pc,d6.w) cnop 0,4 .width_tab3 dc.w -3*SCREENWIDTH dc.w -2*SCREENWIDTH dc.w -1*SCREENWIDTH dc.w 0 .tmap_tab3 dc.w .03-.loop3 dc.w .13-.loop3 dc.w .23-.loop3 dc.w .33-.loop3 .loop3 .33 move.b (a1,d0.w),d2 move.l d2,a2 addx.l d1,d0 move.b (a2),d4 move.l d4,a2 and.w d3,d0 move.w (a2),d6 move.b (a2),d6 move.w d6,(a0) .23 move.b (a1,d0.w),d2 move.l d2,a2 addx.l d1,d0 move.b (a2),d4 move.l d4,a2 and.w d3,d0 move.w (a2),d6 move.b (a2),d6 move.w d6,SCREENWIDTH(a0) .13 move.b (a1,d0.w),d2 move.l d2,a2 addx.l d1,d0 move.b (a2),d4 move.l d4,a2 and.w d3,d0 move.w (a2),d6 move.b (a2),d6 move.w d6,SCREENWIDTH*2(a0) .03 move.b (a1,d0.w),d2 move.l d2,a2 addx.l d1,d0 move.b (a2),d4 move.l d4,a2 and.w d3,d0 move.w (a2),d6 move.b (a2),d6 move.b d6,SCREENWIDTH*3(a0) add.l a3,a0 .loop_end3 dbf d7,.loop3 .end3 movem.l (sp)+,d2-d4/d6-d7/a2/a3 rts ;----------------------------------------------------------------------- cnop 0,4 _R_DrawFuzzColumnLow @R_DrawFuzzColumnLow movem.l d4/d6-d7/a2/a3,-(sp) move.l _viewheight(a4),d1 subq.l #1,d1 move.l _dc_yh(a4),d7 ; count = _dc_yh - _dc_yl cmp.l d1,d7 bne.b .skip_yh4 subq.l #1,d1 move.l d1,d7 .skip_yh4 move.l _dc_yl(a4),d0 bne.b .skip_yl4 moveq #1,d0 .skip_yl4 sub.l d0,d7 bmi.w .end4 move.l _dc_x(a4),d1 ; dest = ylookup[_dc_yl] + columnofs[_dc_x] lea _ylookup(a4),a0 add.l d1,d1 move.l (a0,d0.l*4),a0 lea _columnofs(a4),a1 add.l (a1,d1.l*4),a0 move.l _colormaps(a4),d4 add.l #6*256,d4 lea _fuzzoffset(a4),a1 move.l _fuzzpos(a4),d0 ; bring it down .pos_loop4 sub.w #200,d0 bpl .pos_loop4 add.w #200,d0 add.l d0,a1 lea (SCREENWIDTH*4).w,a3 ; d7: cnt >> 2 ; a0: chunky ; a1: fuzzoffset ; d0: frac (uuuu uuuu uuuu uuuu 0000 0000 0UUU UUUU) ; d1: dfrac (.......................................) ; d3: $7f ; d4: light table aligned to 256 byte boundary ; a3: SCREENWIDTH move.l d7,d6 and.w #3,d6 add.w .width_tab4(pc,d6.w*2),a0 lsr.w #2,d7 move.w .tmap_tab4(pc,d6.w*2),d6 jmp .loop4(pc,d6.w) cnop 0,4 .width_tab4 dc.w -3*SCREENWIDTH dc.w -2*SCREENWIDTH dc.w -1*SCREENWIDTH dc.w 0 .tmap_tab4 dc.w .04-.loop4 dc.w .14-.loop4 dc.w .24-.loop4 dc.w .34-.loop4 .loop4 .34 move.l a0,a2 ; This is essentially add.l (a1)+,a2 ; just moving memory around. move.b (a2),d4 move.l d4,a2 move.w (a2),d6 move.b (a2),d6 move.w d6,(a0) .24 lea SCREENWIDTH(a0),a2 add.l (a1)+,a2 move.b (a2),d4 move.l d4,a2 move.w (a2),d6 move.b (a2),d6 move.w d6,SCREENWIDTH(a0) .14 lea 2*SCREENWIDTH(a0),a2 add.l (a1)+,a2 move.b (a2),d4 move.l d4,a2 move.w (a2),d6 move.b (a2),d6 move.w d6,2*SCREENWIDTH(a0) .04 lea 3*SCREENWIDTH(a0),a2 add.l (a1)+,a2 move.b (a2),d4 move.l d4,a2 move.w (a2),d6 move.b (a2),d6 move.w d6,3*SCREENWIDTH(a0) add.l a3,a0 .loop_end4 dbf d7,.loop4 sub.l #_fuzzoffset,a1 move.l a1,_fuzzpos .end4 movem.l (sp)+,d4/d6-d7/a2/a3 rts ;----------------------------------------------------------------------- ; high detail versions ;----------------------------------------------------------------------- cnop 0,4 _R_DrawFuzzColumn @R_DrawFuzzColumn movem.l d4/d6-d7/a2/a3,-(sp) move.l _viewheight(a4),d1 subq.l #1,d1 move.l _dc_yh(a4),d7 ; count = _dc_yh - _dc_yl cmp.l d1,d7 bne.b .skip_yh5 subq.l #1,d1 move.l d1,d7 .skip_yh5 move.l _dc_yl(a4),d0 bne.b .skip_yl5 moveq #1,d0 .skip_yl5 sub.l d0,d7 bmi.w .end5 move.l _dc_x(a4),d1 ; dest = ylookup[_dc_yl] + columnofs[_dc_x] lea _ylookup(a4),a0 move.l (a0,d0.l*4),a0 lea _columnofs(a4),a1 add.l (a1,d1.l*4),a0 move.l _colormaps(a4),d4 add.l #6*256,d4 lea _fuzzoffset(a4),a1 move.l _fuzzpos(a4),d0 .pos_loop5 sub.w #200,d0 bpl .pos_loop5 add.w #200,d0 add.l d0,a1 lea (SCREENWIDTH*4).w,a3 ; d7: cnt >> 2 ; a0: chunky ; a1: fuzzoffset ; d0: frac (uuuu uuuu uuuu uuuu 0000 0000 0UUU UUUU) ; d1: dfrac (.......................................) ; d3: $7f ; d4: light table aligned to 256 byte boundary ; a3: SCREENWIDTH move.l d7,d6 and.w #3,d6 add.w .width_tab5(pc,d6.w*2),a0 lsr.w #2,d7 move.w .tmap_tab5(pc,d6.w*2),d6 jmp .loop5(pc,d6.w) cnop 0,4 .width_tab5 dc.w -3*SCREENWIDTH dc.w -2*SCREENWIDTH dc.w -1*SCREENWIDTH dc.w 0 .tmap_tab5 dc.w .05-.loop5 dc.w .15-.loop5 dc.w .25-.loop5 dc.w .35-.loop5 .loop5 .35 move.l a0,a2 ; This is essentially add.l (a1)+,a2 ; just moving memory around. move.b (a2),d4 move.l d4,a2 ; Not 060 optimized but move.b (a2),(a0) ; if you have hordes of .25 lea SCREENWIDTH(a0),a2 ; invisible monsters which add.l (a1)+,a2 ; slow down the game too much, move.b (a2),d4 ; do tell me. move.l d4,a2 move.b (a2),SCREENWIDTH(a0) .15 lea 2*SCREENWIDTH(a0),a2 add.l (a1)+,a2 move.b (a2),d4 move.l d4,a2 move.b (a2),2*SCREENWIDTH(a0) .05 lea 3*SCREENWIDTH(a0),a2 add.l (a1)+,a2 move.b (a2),d4 move.l d4,a2 move.b (a2),3*SCREENWIDTH(a0) add.l a3,a0 .loop_end5 dbf d7,.loop5 sub.l #_fuzzoffset,a1 move.l a1,_fuzzpos .end5 movem.l (sp)+,d4/d6-d7/a2/a3 rts ;----------------------------------------------------------------------- cnop 0,4 _R_DrawTranslatedColumn ; no 060 version :( @R_DrawTranslatedColumn movem.l d2-d4/d6-d7/a2/a3,-(sp) move.l _dc_yh(a4),d7 ; count = _dc_yh - _dc_yl move.l _dc_yl(a4),d0 sub.l d0,d7 bmi.w .end6 move.l _dc_x(a4),d1 ; dest = ylookup[_dc_yl] + columnofs[_dc_x] lea _ylookup(a4),a0 move.l (a0,d0.l*4),a0 lea _columnofs(a4),a1 add.l (a1,d1.l*4),a0 move.l _dc_translation(a4),d2 move.l _dc_colormap(a4),d4 move.l _dc_source(a4),a1 move.l _dc_iscale(a4),d1 ; frac = _dc_texturemid + (_dc_yl-centery)*fracstep sub.l _centery(a4),d0 muls.l d1,d0 add.l _dc_texturemid(a4),d0 moveq #$7f,d3 lea (SCREENWIDTH*4).w,a3 ; d7: cnt >> 2 ; a0: chunky ; a1: texture ; d0: frac (uuuu uuuu uuuu uuuu 0000 0000 0UUU UUUU) ; d1: dfrac (.......................................) ; d3: $7f ; d4: light table aligned to 256 byte boundary ; d2: translation table aligned to 256 byte boundary ; a3: SCREENWIDTH move.l d7,d6 and.w #3,d6 swap d0 ; swap decimals and fraction swap d1 add.w .width_tab6(pc,d6.w*2),a0 lsr.w #2,d7 move.w .tmap_tab6(pc,d6.w*2),d6 and.w d3,d0 sub.w d1,d0 add.l d1,d0 ; setup the X flag jmp .loop6(pc,d6.w) cnop 0,4 .width_tab6 dc.w -3*SCREENWIDTH dc.w -2*SCREENWIDTH dc.w -1*SCREENWIDTH dc.w 0 .tmap_tab6 dc.w .06-.loop6 dc.w .16-.loop6 dc.w .26-.loop6 dc.w .36-.loop6 .loop6 .36 move.b (a1,d0.w),d2 move.l d2,a2 addx.l d1,d0 move.b (a2),d4 and.w d3,d0 move.l d4,a2 move.b (a2),(a0) .26 move.b (a1,d0.w),d2 move.l d2,a2 addx.l d1,d0 move.b (a2),d4 and.w d3,d0 move.l d4,a2 move.b (a2),SCREENWIDTH(a0) .16 move.b (a1,d0.w),d2 move.l d2,a2 addx.l d1,d0 move.b (a2),d4 and.w d3,d0 move.l d4,a2 move.b (a2),SCREENWIDTH*2(a0) .06 move.b (a1,d0.w),d2 move.l d2,a2 addx.l d1,d0 move.b (a2),d4 and.w d3,d0 move.l d4,a2 move.b (a2),SCREENWIDTH*3(a0) add.l a3,a0 .loop_end6 dbf d7,.loop6 .end6 movem.l (sp)+,d2-d4/d6-d7/a2/a3 rts ;----------------------------------------------------------------------- cnop 0,4 ; routine from j.selck@flensburg.netsurf.de (Aki's 040 routine is faster) ;_R_DrawColumn_030 ;@R_DrawColumn_030 ; movem.l d3-d7/a2-a5,-(sp) ; move.l _dc_yl(a4),d0 ; move.l _dc_yh(a4),d7 ; sub.l d0,d7 ; bmi.b 1$ ; move.l _dc_x(a4),d1 ; lea _columnofs(a4),a5 ; lea (a5,d1.l*4),a1 ; lea _ylookup(a4),a5 ; movea.l (a5,d0.l*4),a2 ; adda.l (a1),a2 ; move.l _dc_iscale(a4),d6 ; sub.l _centery(a4),d0 ; muls.l d6,d0 ; move.l _dc_texturemid(a4),d5 ; add.l d0,d5 ; movea.l _dc_source(a4),a3 ; movea.l _dc_colormap(a4),a4 ; moveq #127,d4 ; move.l #SCREENWIDTH,d3 ; moveq #0,d1 ; ensure high bits of d1 are clear ; add.w d6,d5 ; frac += fracstep (also sets X flag) ; swap d5 ; swap(frac) ; swap d6 ; swap(fracstep) ; and.w d4,d5 ; (frac>>16)&127 ;2$ move.b (a3,d5.w),d1 ; dc_source[(frac>>FRACBITS)&127] ; move.b (a4,d1.w),(a2) ; *dest = dc_colormap[d1] ; addx.l d6,d5 ; swap(frac += fracstep), use & set X ; adda.l d3,a2 ; dest += SCREENWIDTH ; and.w d4,d5 ; (frac>>16)&127 ; dbra d7,2$ ;1$ movem.l (sp)+,d3-d7/a2-a5 ; rts ;----------------------------------------------------------------------- cnop 0,4 _R_DrawColumn_060 @R_DrawColumn_060 movem.l d2-d3/d5-d7/a2/a3,-(sp) move.l (_dc_yh),d7 ; count = _dc_yh - _dc_yl move.l (_dc_yl),d0 sub.l d0,d7 bmi.w .end7 move.l (_dc_x),d1 ; dest = ylookup[_dc_yl] + columnofs[_dc_x] lea (_ylookup),a0 move.l (a0,d0.l*4),a0 lea (_columnofs),a1 add.l (a1,d1.l*4),a0 move.l (_dc_colormap),a2 move.l (_dc_source),a1 move.l (_dc_iscale),d1 ; frac = _dc_texturemid + (_dc_yl-centery)*fracstep sub.l (_centery),d0 muls.l d1,d0 add.l (_dc_texturemid),d0 moveq #$7f,d3 move.l #SCREENWIDTH,a3 move.l d7,d6 ; Do the leftover iterations in and.w #3,d6 ; this loop. addq.w #1,d6 .skip_loop7 move.l d0,d5 swap d5 and.l d3,d5 move.b (a1,d5.w),d5 add.l d1,d0 move.b (a2,d5.w),(a0) add.l a3,a0 subq.w #1,d6 bne.b .skip_loop7 ; d7: cnt >> 2 ; a0: chunky ; a1: texture ; a2: light_table ; d0: frac (uuuu uuuu uuuu uuuu 0000 0000 0UUU UUUU) ; d1: dfrac*2 (.......................................) ; d2: frac+dfrac(.......................................) ; d3: $7f ; a3: SCREENWIDTH .skip7 lsr.l #2,d7 subq.l #1,d7 bmi.b .end7 add.l a3,a3 move.l d0,d2 add.l a3,a3 add.l d1,d2 add.l d1,d1 eor.w d0,d2 ; swap the fraction part for addx eor.w d2,d0 ; assuming 16.16 fixed point eor.w d0,d2 swap d0 ; swap decimals and fraction swap d1 swap d2 moveq #0,d5 and.w d3,d2 and.w d3,d0 sub.w d1,d0 add.l d1,d0 ; setup the X flag move.b (a1,d2.w),d5 .loop7 ; This should be reasonably scheduled for ; m68060. It should perform well on other processors ; too. That AGU stall still bothers me though. move.b (a1,d0.w),d6 ; stall + pOEP but allows sOEP addx.l d1,d2 ; pOEP only move.b (a2,d5.l),d5 ; pOEP but allows sOEP and.w d3,d2 ; sOEP move.b (a2,d6.l),d6 ; pOEP but allows sOEP move.b d5,SCREENWIDTH(a0) ; sOEP addx.l d1,d0 ; pOEP only move.b (a1,d2.w),d5 ; pOEP but allows sOEP and.w d3,d0 ; sOEP move.b d6,(a0) ; pOEP ; = ~4 cycles/pixel ; + cache misses ; The vertical writes are the true timehog of the loop ; because of the characteristics of the copyback cache ; operation. ; Better mark the chunky buffer as write through ; with the MMU and have all the horizontal writes ; be longs aligned to longword boundary. move.b (a1,d0.w),d6 addx.l d1,d2 move.b (a2,d5.l),d5 and.w d3,d2 move.b (a2,d6.l),d6 move.b d5,SCREENWIDTH*3(a0) addx.l d1,d0 move.b (a1,d2.w),d5 and.w d3,d0 move.b d6,SCREENWIDTH*2(a0) add.l a3,a0 .loop_end7 dbf d7,.loop7 ; it's faster to divide it to two lines on 060 ; and shouldn't be slower on 040. ; move.b (a1,d0.w),d6 ; new ; move.b (a2,d6.l),d6 ; new ; move.b d6,(a0) ; new .end7 movem.l (sp)+,d2-d3/d5-d7/a2/a3 rts ;----------------------------------------------------------------------- cnop 0,4 ; 040 version _R_DrawColumn_040 @R_DrawColumn_040 movem.l d3-d4/d6-d7/a2/a3,-(sp) move.l _dc_yh(a4),d7 ; count = _dc_yh - _dc_yl move.l _dc_yl(a4),d0 sub.l d0,d7 bmi.w .end8 move.l _dc_x(a4),d1 ; dest = ylookup[_dc_yl] + columnofs[_dc_x] lea _ylookup(a4),a0 move.l (a0,d0.l*4),a0 lea _columnofs(a4),a1 add.l (a1,d1.l*4),a0 move.l _dc_colormap(a4),d4 move.l _dc_source(a4),a1 move.l _dc_iscale(a4),d1 ; frac = _dc_texturemid + (_dc_yl-centery)*fracstep sub.l _centery(a4),d0 muls.l d1,d0 add.l _dc_texturemid(a4),d0 moveq #$7f,d3 lea (SCREENWIDTH*4).w,a3 ; d7: cnt >> 2 ; a0: chunky ; a1: texture ; d0: frac (uuuu uuuu uuuu uuuu 0000 0000 0UUU UUUU) ; d1: dfrac (.......................................) ; d3: $7f ; d4: light table aligned to 256 byte boundary ; a3: SCREENWIDTH move.l d7,d6 and.w #3,d6 swap d0 ; swap decimals and fraction swap d1 add.w .width_tab8(pc,d6.w*2),a0 lsr.w #2,d7 move.w .tmap_tab8(pc,d6.w*2),d6 and.w d3,d0 sub.w d1,d0 add.l d1,d0 ; setup the X flag jmp .loop8(pc,d6.w) cnop 0,4 .width_tab8 dc.w -3*SCREENWIDTH dc.w -2*SCREENWIDTH dc.w -1*SCREENWIDTH dc.w 0 .tmap_tab8 dc.w .08-.loop8 dc.w .18-.loop8 dc.w .28-.loop8 dc.w .38-.loop8 .loop8 .38 move.b (a1,d0.w),d4 addx.l d1,d0 move.l d4,a2 and.w d3,d0 move.b (a2),(a0) .28 move.b (a1,d0.w),d4 addx.l d1,d0 move.l d4,a2 and.w d3,d0 move.b (a2),SCREENWIDTH(a0) .18 move.b (a1,d0.w),d4 addx.l d1,d0 move.l d4,a2 and.w d3,d0 move.b (a2),SCREENWIDTH*2(a0) .08 move.b (a1,d0.w),d4 addx.l d1,d0 move.l d4,a2 and.w d3,d0 move.b (a2),SCREENWIDTH*3(a0) add.l a3,a0 .loop_end8 dbf d7,.loop8 .end8 movem.l (sp)+,d3-d4/d6-d7/a2/a3 rts ;----------------------------------------------------------------------- ; This faster version by Aki M Laukkanen <amlaukka@cc.helsinki.fi> cnop 0,4 _R_DrawSpan_060 @R_DrawSpan_060 movem.l d2-d7/a2/a3,-(sp) move.l (_ds_y),d0 move.l (_ds_x1),d1 ; dest = ylookup[_ds_y] + columnofs[_ds_x1] lea (_ylookup),a0 move.l (a0,d0.l*4),a0 lea (_columnofs),a1 add.l (a1,d1.l*4),a0 move.l (_ds_source),a1 move.l (_ds_colormap),a2 move.l (_ds_x2),d7 ; count = _ds_x2 - _ds_x1 sub.l d1,d7 addq.l #1,d7 move.l (_ds_xfrac),d0 move.l (_ds_yfrac),d1 move.l (_ds_xstep),d2 move.l (_ds_ystep),d3 move.l a0,d4 btst #0,d4 beq.b .skipb9 move.l d0,d5 ; do the unaligned pixels move.l d1,d6 ; so we can write to longword swap d5 ; boundary in the main loop swap d6 and.w #$3f,d5 and.w #$3f,d6 lsl.w #6,d6 or.w d5,d6 move.b (a1,d6.w),d5 add.l d2,d0 move.b (a2,d5.w),(a0)+ add.l d3,d1 move.l a0,d4 subq.l #1,d7 .skipb9 btst #1,d4 beq.b .skips9 moveq #2,d4 cmp.l d4,d7 bls.b .skips9 move.l d0,d5 ; write two pixels move.l d1,d6 swap d5 swap d6 and.w #$3f,d5 and.w #$3f,d6 lsl.w #6,d6 or.w d5,d6 move.b (a1,d6.w),d5 move.w (a2,d5.w),d4 add.l d2,d0 add.l d3,d1 move.l d0,d5 move.l d1,d6 swap d5 swap d6 and.w #$3f,d5 and.w #$3f,d6 lsl.w #6,d6 or.w d5,d6 move.b (a1,d6.w),d5 move.b (a2,d5.w),d4 add.l d2,d0 move.w d4,(a0)+ add.l d3,d1 subq.l #2,d7 .skips9 move.l d7,d6 ; setup registers and.w #3,d6 move.l d6,a3 eor.w d0,d1 ; swap fraction parts for addx eor.w d2,d3 eor.w d1,d0 eor.w d3,d2 eor.w d0,d1 eor.w d2,d3 swap d0 swap d1 swap d2 swap d3 lsl.w #6,d1 lsl.w #6,d3 moveq #0,d6 moveq #0,d5 sub.l #$f000,a1 lsr.l #2,d7 beq.w .skip_loop29 subq.l #1,d7 sub.w d3,d1 add.l d3,d1 ; setup the X flag or.w #$ffc0,d0 or.w #$f03f,d1 move.w d0,d6 and.w d1,d6 bra.b .start_loop29 cnop 0,8 .loop29 or.w #$ffc0,d0 ; pOEP or.w #$f03f,d1 ; sOEP move.b (a2,d5.l),d4 ; pOEP but allows sOEP move.w d0,d6 ; sOEP and.w d1,d6 ; pOEP move.l d4,(a0)+ ; sOEP .start_loop29 addx.l d2,d0 ; pOEP only addx.l d3,d1 ; pOEP only move.b (a1,d6.l),d5 ; pOEP but allows sOEP or.w #$ffc0,d0 ; sOEP or.w #$f03f,d1 ; pOEP move.w d0,d6 ; sOEP move.w (a2,d5.l),d4 ; pOEP but allows sOEP and.w d1,d6 ; sOEP addx.l d2,d0 ; pOEP only addx.l d3,d1 ; pOEP only move.b (a1,d6.l),d5 ; pOEP but allows sOEP or.w #$ffc0,d0 ; sOEP or.w #$f03f,d1 ; pOEP move.w d0,d6 ; sOEP move.b (a2,d5.l),d4 ; pOEP but allows sOEP and.w d1,d6 ; sOEP addx.l d2,d0 ; pOEP only addx.l d3,d1 ; pOEP only move.b (a1,d6.l),d5 ; pOEP but allows sOEP or.w #$ffc0,d0 ; sOEP or.w #$f03f,d1 ; pOEP move.w d0,d6 ; sOEP swap d4 ; pOEP only move.w (a2,d5.l),d4 ; pOEP but allows sOEP and.w d1,d6 ; sOEP addx.l d2,d0 ; pOEP only addx.l d3,d1 ; pOEP only move.b (a1,d6.l),d5 ; pOEP but allows sOEP dbf d7,.loop29 ; pOEP only = 7.75 cycles/pixel move.b (a2,d5.l),d4 move.l d4,(a0)+ .skip_loop29 sub.w d3,d1 add.l d3,d1 move.l a3,d7 bra.b .loop_end29 .loop39 or.w #$ffc0,d0 or.w #$f03f,d1 move.w d0,d6 and.w d1,d6 addx.l d2,d0 addx.l d3,d1 move.b (a1,d6.l),d5 move.b (a2,d5.l),(a0)+ .loop_end29 dbf d7,.loop39 .end29 movem.l (sp)+,d2-d7/a2/a3 rts cnop 0,4 ;----------------------------------------------------------------------- ; 030/040 version _R_DrawSpan_040 @R_DrawSpan_040 movem.l d2-d7/a2-a4,-(sp) move.l _ds_y(a4),d0 move.l _ds_x1(a4),d1 ; dest = ylookup[_ds_y] + columnofs[_ds_x1] lea _ylookup(a4),a0 move.l (a0,d0.l*4),a0 lea _columnofs(a4),a1 add.l (a1,d1.l*4),a0 move.l _ds_source(a4),a1 move.l _ds_colormap(a4),a2 move.l _ds_x2(a4),d7 ; count = _ds_x2 - _ds_x1 sub.l d1,d7 addq.l #1,d7 move.l _ds_xfrac(a4),d0 move.l _ds_yfrac(a4),d1 move.l _ds_xstep(a4),d2 move.l _ds_ystep(a4),d3 move.l a0,d4 btst #0,d4 beq.b .skipb0 move.l d0,d5 ; do the unaligned pixels move.l d1,d6 ; so we can write to longword swap d5 ; boundary in the main loop swap d6 and.w #$3f,d5 and.w #$3f,d6 lsl.w #6,d6 or.w d5,d6 move.b (a1,d6.w),d5 add.l d2,d0 move.b (a2,d5.w),(a0)+ add.l d3,d1 move.l a0,d4 subq.l #1,d7 .skipb0 btst #1,d4 beq.b .skips0 moveq #2,d4 cmp.l d4,d7 bls.b .skips0 move.l d0,d5 ; write two pixels move.l d1,d6 swap d5 swap d6 and.w #$3f,d5 and.w #$3f,d6 lsl.w #6,d6 or.w d5,d6 move.b (a1,d6.w),d5 move.w (a2,d5.w),d4 add.l d2,d0 add.l d3,d1 move.l d0,d5 move.l d1,d6 swap d5 swap d6 and.w #$3f,d5 and.w #$3f,d6 lsl.w #6,d6 or.w d5,d6 move.b (a1,d6.w),d5 move.b (a2,d5.w),d4 add.l d2,d0 move.w d4,(a0)+ add.l d3,d1 subq.l #2,d7 .skips0 move.l a2,d4 add.l #$1000,a1 ; catch 22 move.l a0,a3 add.l d7,a3 move.l d7,d5 and.b #~3,d5 move.l a0,a4 add.l d5,a4 eor.w d0,d1 ; swap fraction parts for addx eor.w d2,d3 eor.w d1,d0 eor.w d3,d2 eor.w d0,d1 eor.w d2,d3 swap d0 swap d1 swap d2 swap d3 lsl.w #6,d1 lsl.w #6,d3 move.w #$ffc0,d6 move.w #$f03f,d7 lsr.w #2,d5 beq.b .skip_loop20 sub.w d2,d0 add.l d2,d0 ; setup the X flag .loop20 or.w d6,d0 or.w d7,d1 and.w d1,d0 addx.l d3,d1 move.b (a1,d0.w),d4 addx.l d2,d0 move.l d4,a2 move.w (a2),d5 or.w d6,d0 or.w d7,d1 and.w d1,d0 addx.l d3,d1 move.b (a1,d0.w),d4 addx.l d2,d0 move.l d4,a2 move.b (a2),d5 swap d5 or.w d6,d0 or.w d7,d1 and.w d1,d0 addx.l d3,d1 move.b (a1,d0.w),d4 addx.l d2,d0 move.l d4,a2 move.w (a2),d5 or.w d6,d0 or.w d7,d1 and.w d1,d0 addx.l d3,d1 move.b (a1,d0.w),d4 addx.l d2,d0 move.l d4,a2 move.b (a2),d5 move.l d5,(a0)+ cmp.l a0,a4 bne.b .loop20 .skip_loop20 sub.w d2,d0 add.l d2,d0 bra.b .loop_end20 .loop30 or.w d6,d0 or.w d7,d1 and.w d1,d0 addx.l d3,d1 move.b (a1,d0.w),d4 addx.l d2,d0 move.l d4,a2 move.b (a2),(a0)+ .loop_end20 cmp.l a0,a3 bne.b .loop30 .end20 movem.l (sp)+,d2-d7/a2-a4 rts ;----------------------------------------------------------------------- xref _segtextured xref _markfloor xref _markceiling xref _maskedtexture xref _maskedtexturecol xref _toptexture xref _bottomtexture xref _midtexture xref _rw_x xref _rw_stopx xref _rw_centerangle xref _rw_offset xref _rw_distance xref _rw_scale xref _rw_scalestep xref _rw_midtexturemid xref _rw_toptexturemid xref _rw_bottomtexturemid xref _pixhigh xref _pixlow xref _pixhighstep xref _pixlowstep xref _topfrac xref _topstep xref _bottomfrac xref _bottomstep xref _walllights xref _ceilingclip xref _ceilingplane xref _floorclip xref _floorplane xref _xtoviewangle xref _finetangent xref _FixedMul xref _colfunc xref @R_GetColumn cnop 0,4 _R_RenderSegLoop @R_RenderSegLoop movem.l d2-d7/a2-a6,-(sp) movea.l _rw_x(a4),a2 ; a2 = rw_x movea.l _topfrac(a4),a3 ; a3 = topfrac movea.l _bottomfrac(a4),a5 ; a5 = bottomfrac movea.l _rw_scale(a4),a6 ; a6 = rw_scale bra.w 1$ ; for ( ; rw_x < rw_stopx ; rw_x++) 20$ move.l a2,d0 ; d0 = rw_x move.l a3,d7 ; d7 = topfrac lea _ceilingclip(a4),a0 ; a0 -> ceilingclip subq.l #1,d7 ; d7 = topfrac - 1 move.w (a0,d0.l*2),d3 ; d3.w = ceilingclip[rw_x] asr.l #8,d7 ; d7 = (topfrac - 1) >> 8 ext.l d3 ; d3 = ceilingclip[rw_x] asr.l #4,d7 ; d7 = (topfrac - 1) >> 12 addq.l #1,d3 ; d3 = top = ceilingclip[rw_x] + 1 addq.l #1,d7 ; d7 = yl = (topfrac + (1 << 12) - 1) >> 12 cmp.l d3,d7 bge 2$ move.l d3,d7 ; d7 = yl = ceilingclip[rw_x] + 1 2$ tst.l _markceiling(a4) ; if (markceiling) { beq.b 3$ lea _floorclip(a4),a1 ; a1 -> floorclip move.l d7,d4 ; d4 = yl move.w (a1,d0.l*2),d1 ; d1.w = floorclip[rw_x] subq.l #1,d4 ; d4 = bottom = yl - 1 ext.l d1 ; d1 = floorclip[rw_x] cmp.l d1,d4 ; if (bottom >= floorclip[rw_x]) blt.b 4$ move.l d1,d4 subq.l #1,d4 ; d4 = bottom = floorclip[rw_x] - 1 4$ cmp.l d4,d3 ; if (top <= bottom) bgt.b 3$ movea.l _ceilingplane(a4),a1 adda.l d0,a1 ; a1 -> ceilingplane->0[rw_x] move.b d3,$15(a1) ; ceilingplane->top[rw_x] = top move.b d4,$157(a1) ; ceilingplane->bottom[rw_x] = bottom 3$ move.l a5,d3 ; d3 = bottomfrac lea _floorclip(a4),a1 ; a1 -> floorclip asr.l #8,d3 ; d3 = bottomfrac >> 8 move.w (a1,d0.l*2),d1 ; d1.w = floorclip[rw_x] asr.l #4,d3 ; d3 = yh = bottomfrac >> 12 ext.l d1 ; d1 = floorclip[rw_x] cmp.l d1,d3 ; if (yh >= floorclip[rw_x]) blt.b 5$ move.l d1,d3 subq.l #1,d3 ; d3 = yh = floorclip[rw_x] - 1 5$ tst.l _markfloor(a4) ; if (markfloor) beq.b 6$ move.l d3,d4 ; d4 = yh move.w (a0,d0.l*2),d2 ; d2.w = ceilingclip[rw_x] addq.l #1,d4 ; d4 = top = yh + 1 ext.l d2 ; d2 = ceilingclip[rw_x] subq.l #1,d1 ; d1 = bottom = floorclip[rw_x] - 1 cmp.l d2,d4 ; if (top <= ceilingclip[rw_x]) bgt.b 7$ move.l d2,d4 addq.l #1,d4 ; d4 = top = ceilingclip[rw_x] + 1 7$ cmp.l d1,d4 ; if (top <= bottom) bgt.b 6$ movea.l _floorplane(a4),a1 adda.l d0,a1 ; a1 -> floorplane->0[rw_x] move.b d4,$15(a1) ; floorplane->top[rw_x] = top move.b d1,$157(a1) ; floorplane->bottom[rw_x] = bottom 6$ tst.l _segtextured(a4) ; if (segtextured) beq.b 8$ lea _xtoviewangle(a4),a0 ; a0 -> xtoviewangle move.l _rw_centerangle(a4),d1 add.l (a0,d0.l*4),d1 ; d1 = rw_centerangle + xtoviewangle[rw_x] swap d1 ; d1 = angle lea (_finetangent),a0 ; a0 -> finetangent lsr.w #3,d1 move.l (a0,d1.w*4),d0 ; d0 = finetangent[angle] movea.l _FixedMul(a4),a0 move.l _rw_distance(a4),d1 jsr (a0) ; d0 = FixedMul(finetangent[angle],rw_distance) move.l _rw_offset(a4),d5 move.l a6,d4 ; d4 = rw_scale sub.l d0,d5 ; d5 = rw_offset-FixedMul(finetangent[angle],rw_distance) asr.l #8,d4 swap d5 ; d5.w = texturecolumn >>= 16 asr.l #4,d4 ; d4 = index = rw_scale >> 12 ext.l d5 ; d5 = texturecolumn moveq #$30,d2 ; d2 = MAXLIGHTSCALE = $30 cmp.l d2,d4 ; if (index >= MAXLIGHTSCALE) bcs.b 9$ moveq #$2f,d4 ; d4 = index = MAXLIGHTSCALE - 1 9$ movea.l _walllights(a4),a0 moveq #-1,d0 ; d0 = $ffffffff move.l (a0,d4.l*4),_dc_colormap(a4) ; dc_colormap = walllights[index] move.l a2,_dc_x(a4) ; dc_x = rw_x move.l a6,d1 ; d1 = rw_scale divu.l d1,d0 move.l d0,_dc_iscale(a4) ; dc_iscale = $ffffffff / rw_scale 8$ move.l _midtexture(a4),d0 ; if (midtexture) beq.b 10$ move.l d7,_dc_yl(a4) ; dc_yl = yl move.l d3,_dc_yh(a4) ; dc_yh = yh move.l _rw_midtexturemid(a4),_dc_texturemid(a4) move.l d5,d1 ; d1 = texturecolumn jsr (@R_GetColumn) move.l d0,_dc_source(a4) ; dc_source = R_GetColumn(midtexture,texturecolumn) movea.l _colfunc(a4),a0 jsr (a0) ; colfunc() move.l a2,d0 ; d0 = rw_x move.l _viewheight(a4),d1 ; d1 = viewheight lea _ceilingclip(a4),a0 move.w d1,(a0,d0.l*2) ; ceilingclip[rw_x] = viewheight lea _floorclip(a4),a0 move.w #$ffff,(a0,d0.l*2) ; floorclip[rw_x] = -1 bra.w 11$ 10$ move.l _toptexture(a4),d0 ; if (toptexture) beq.b 12$ move.l _pixhighstep(a4),d1 ; d1 = pixhighstep move.l _pixhigh(a4),d2 ; d2 = pixhigh add.l d1,_pixhigh(a4) ; pixhigh += pixhighstep lea _floorclip(a4),a0 ; a0 -> floorclip asr.l #8,d2 ; d2 = pixhigh >> 8 move.l a2,d1 ; d1 = rw_x move.w (a0,d1.l*2),d1 ; d1.w = floorclip[rw_x] asr.l #4,d2 ; d2 = pixhigh >> 12 ext.l d1 ; d1 = floorclip[rw_x] move.l d2,d6 ; d6 = mid = pixhigh >> 12 cmp.l d1,d6 ; if (mid >= floorclip[rw_x]) blt.b 13$ move.l d1,d6 subq.l #1,d6 ; d6 = mid = floorclip[rw_x] - 1 13$ cmp.l d7,d6 ; if (mid >= yl) blt.b 14$ move.l d7,_dc_yl(a4) ; dc_yl = yl move.l d6,_dc_yh(a4) ; dc_yh = mid move.l _rw_toptexturemid(a4),_dc_texturemid(a4) move.l d5,d1 ; d1 = texturecolumn, d0 = toptexture jsr (@R_GetColumn) move.l d0,_dc_source(a4) ; dc_source = R_GetColumn(d0,d1) movea.l _colfunc(a4),a0 jsr (a0) ; colfunc() move.l a2,d0 ; d0 = rw_x lea _ceilingclip(a4),a0 move.w d6,(a0,d0.l*2) ; ceilingclip[rw_x] = mid bra.b 15$ 12$ tst.l _markceiling(a4) ; else if (markceiling) beq.b 15$ 14$ subq.l #1,d7 ; d7 = yl - 1 move.l a2,d0 ; d0 = rw_x lea _ceilingclip(a4),a0 move.w d7,(a0,d0.l*2) ; ceilingclip[rw_x] = yl - 1 15$ move.l _bottomtexture(a4),d0 ; if (bottomtexture) beq.b 16$ move.l _pixlow(a4),d6 ; d6 = pixlow move.l d6,d1 ; d1 = pixlow lea _ceilingclip(a4),a0 ; a0 -> ceilingclip add.l _pixlowstep(a4),d1 ; d1 = pixlow + pixlowstep subq.l #1,d6 ; d6 = pixlow - 1 move.l d1,_pixlow(a4) ; pixlow += pixlowstep asr.l #8,d6 ; d6 = (pixlow - 1) >> 8 move.l a2,d1 ; d1 = rw_x asr.l #4,d6 ; d6 = (pixlow - 1) >> 12 move.w (a0,d1.l*2),d1 ; d1.w = ceilingclip[rw_x] addq.l #1,d6 ; d6 = mid = (pixlow + (1 << 12) - 1) >> 12 ext.l d1 ; d1 = ceilingclip[rw_x] cmp.l d1,d6 ; if (mid <= ceilingclip[rw_x]) bgt.b 17$ move.l d1,d6 addq.l #1,d6 ; d6 = mid = ceilingclip[rw_x] + 1 17$ cmp.l d3,d6 ; if (mid <= yh) bgt.b 18$ move.l d6,_dc_yl(a4) ; dc_yl = mid move.l d3,_dc_yh(a4) ; dc_yh = yh move.l _rw_bottomtexturemid(a4),_dc_texturemid(a4) move.l d5,d1 ; d1 = texturecolumn, d0 = bottomtexture jsr (@R_GetColumn) move.l d0,_dc_source(a4) ; dc_source = R_GetColumn(d0,d1) movea.l _colfunc(a4),a0 jsr (a0) ; colfunc () move.l a2,d0 ; d0 = rw_x lea _floorclip(a4),a0 move.w d6,(a0,d0.l*2) ; floorclip[rw_x] = mid bra.b 19$ 16$ tst.l _markfloor(a4) ; else if (markfloor) beq.b 19$ 18$ addq.l #1,d3 ; d3 = yh + 1 move.l a2,d0 ; d0 = rw_x lea _floorclip(a4),a0 move.w d3,(a0,d0.l*2) ; floorclip[rw_x] = yh + 1 19$ tst.l _maskedtexture(a4) ; if (maskedtexture) beq.b 11$ move.l a2,d0 ; d0 = rw_x movea.l _maskedtexturecol(a4),a0 move.w d5,(a0,d0.l*2) ; maskedtexturecol[rw_x] = texturecolumn 11$ adda.l _rw_scalestep(a4),a6 ; rw_scale += rw_scalestep adda.l _topstep(a4),a3 ; topfrac += topstep adda.l _bottomstep(a4),a5 ; bottomfrac += bottomstep addq.l #1,a2 ; rw_x++ 1$ cmpa.l _rw_stopx(a4),a2 blt.w 20$ move.l a2,_rw_x(a4) move.l a3,_topfrac(a4) move.l a5,_bottomfrac(a4) move.l a6,_rw_scale(a4) movem.l (sp)+,d2-d7/a2-a6 rts ;*********************************************************************** end